setwd("~/Dropbox/clado-manuscript/Mikes_MS_Data/")
library(ggplot2)
library(plyr)
library(dplyr)
library(Rmisc)
library(DESeq2)
library(doParallel)
#source('http://bioconductor.org/biocLite.R')
#biocLite('phyloseq')
library(phyloseq)
# Load biom file.
biom <- import_biom("OTU_table.biom", "~/Dropbox/clado-manuscript/Nephele/PipelineResults_NMEPINZ20QK1/nephele_outputs/tree.tre", parseFunction=parse_taxonomy_greengenes)
sam.data <- read.csv(file="sample.data.csv", row.names=1, header=TRUE)
head(sam.data)
## TreatmentGroup Site Date Description
## C172N1 Early North 172 Sample of day 172 at site North 1
## C172N2 Early North 172 Sample of day 172 at site North 2
## C172N3 Early North 172 Sample of day 172 at site North 3
## C172P1 Early Point 172 Sample of day 172 at site Point 1
## C172P2 Early Point 172 Sample of day 172 at site Point 2
## C172P3 Early Point 172 Sample of day 172 at site Point 3
## SampleID.1
## C172N1 C172N1
## C172N2 C172N2
## C172N3 C172N3
## C172P1 C172P1
## C172P2 C172P2
## C172P3 C172P3
sam.data$Date <- as.factor(sam.data$Date)
sam.data$DateSite <- paste(sam.data$Date, sam.data$Site)
head(sam.data); str(sam.data)
## TreatmentGroup Site Date Description
## C172N1 Early North 172 Sample of day 172 at site North 1
## C172N2 Early North 172 Sample of day 172 at site North 2
## C172N3 Early North 172 Sample of day 172 at site North 3
## C172P1 Early Point 172 Sample of day 172 at site Point 1
## C172P2 Early Point 172 Sample of day 172 at site Point 2
## C172P3 Early Point 172 Sample of day 172 at site Point 3
## SampleID.1 DateSite
## C172N1 C172N1 172 North
## C172N2 C172N2 172 North
## C172N3 C172N3 172 North
## C172P1 C172P1 172 Point
## C172P2 C172P2 172 Point
## C172P3 C172P3 172 Point
## 'data.frame': 52 obs. of 6 variables:
## $ TreatmentGroup: Factor w/ 2 levels "Early","Late": 1 1 1 1 1 1 1 1 1 1 ...
## $ Site : Factor w/ 3 levels "North","Point",..: 1 1 1 2 2 2 3 3 3 1 ...
## $ Date : Factor w/ 6 levels "172","178","185",..: 1 1 1 1 1 1 1 1 1 2 ...
## $ Description : Factor w/ 52 levels "Sample of day 172 at site North 1",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ SampleID.1 : Factor w/ 52 levels "C172N1","C172N2",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ DateSite : chr "172 North" "172 North" "172 North" "172 Point" ...
sample_data(biom) <- sam.data
biom; sample_data(biom)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 51928 taxa and 52 samples ]
## sample_data() Sample Data: [ 52 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 51928 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 51928 tips and 51914 internal nodes ]
## Sample Data: [52 samples by 6 sample variables]:
## TreatmentGroup Site Date Description
## C178N1 Early North 178 Sample of day 178 at site North 1
## C178P1 Early Point 178 Sample of day 178 at site Point 1
## C185P2 Early Point 185 Sample of day 185 at site Point 2
## C206N2 Late North 206 Sample of day 206 at site North 2
## C206P1 Late Point 206 Sample of day 206 at site Point 1
## C206P2 Late Point 206 Sample of day 206 at site Point 2
## C214P1 Late Point 214 Sample of day 214 at site Point 1
## C214P2 Late Point 214 Sample of day 214 at site Point 2
## C214P3 Late Point 214 Sample of day 214 at site Point 3
## C214S1 Late South 214 Sample of day 214 at site South 1
## C214S2 Late South 214 Sample of day 214 at site South 2
## C214S3 Late South 214 Sample of day 214 at site South 3
## C185P1 Early Point 185 Sample of day 185 at site Point 1
## C185P3 Early Point 185 Sample of day 185 at site Point 3
## C199P3 Late Point 199 Sample of day 199 at site Point 3
## C199S2 Late South 199 Sample of day 199 at site South 2
## C199S3 Late South 199 Sample of day 199 at site South 3
## C206N1 Late North 206 Sample of day 206 at site North 1
## C178P2 Early Point 178 Sample of day 178 at site Point 2
## C199N3 Late North 199 Sample of day 199 at site North 3
## C206S1 Late South 206 Sample of day 206 at site South 1
## C214N3 Late North 214 Sample of day 214 at site North 3
## C199N2 Late North 199 Sample of day 199 at site North 2
## C206N3 Late North 206 Sample of day 206 at site North 3
## C206S2 Late South 206 Sample of day 206 at site South 2
## C199N1 Late North 199 Sample of day 199 at site North 1
## C199P1 Late Point 199 Sample of day 199 at site Point 1
## C199S1 Late South 199 Sample of day 199 at site South 1
## C214N1 Late North 214 Sample of day 214 at site North 1
## C172P1 Early Point 172 Sample of day 172 at site Point 1
## C199P2 Late Point 199 Sample of day 199 at site Point 2
## C172N3 Early North 172 Sample of day 172 at site North 3
## C172S3 Early South 172 Sample of day 172 at site South 3
## C178S2 Early South 178 Sample of day 178 at site South 2
## C178P3 Early Point 178 Sample of day 178 at site Point 3
## C178S3 Early South 178 Sample of day 178 at site South 3
## C172N1 Early North 172 Sample of day 172 at site North 1
## C172S1 Early South 172 Sample of day 172 at site South 1
## C178N3 Early North 178 Sample of day 178 at site North 3
## C185N2 Early North 185 Sample of day 185 at site North 2
## C185N3 Early North 185 Sample of day 185 at site North 3
## C185S3 Early South 185 Sample of day 185 at site South 3
## C214N2 Late North 214 Sample of day 214 at site North 2
## C172P2 Early Point 172 Sample of day 172 at site Point 2
## C185S2 Early South 185 Sample of day 185 at site South 2
## C172P3 Early Point 172 Sample of day 172 at site Point 3
## C185N1 Early North 185 Sample of day 185 at site North 1
## C172N2 Early North 172 Sample of day 172 at site North 2
## C178S1 Early South 178 Sample of day 178 at site South 1
## C185S1 Early South 185 Sample of day 185 at site South 1
## C172S2 Early South 172 Sample of day 172 at site South 2
## C178N2 Early North 178 Sample of day 178 at site North 2
## SampleID.1 DateSite
## C178N1 C178N1 178 North
## C178P1 C178P1 178 Point
## C185P2 C185P2 185 Point
## C206N2 C206N2 206 North
## C206P1 C206P1 206 Point
## C206P2 C206P2 206 Point
## C214P1 C214P1 214 Point
## C214P2 C214P2 214 Point
## C214P3 C214P3 214 Point
## C214S1 C214S1 214 South
## C214S2 C214S2 214 South
## C214S3 C214S3 214 South
## C185P1 C185P1 185 Point
## C185P3 C185P3 185 Point
## C199P3 C199P3 199 Point
## C199S2 C199S2 199 South
## C199S3 C199S3 199 South
## C206N1 C206N1 206 North
## C178P2 C178P2 178 Point
## C199N3 C199N3 199 North
## C206S1 C206S1 206 South
## C214N3 C214N3 214 North
## C199N2 C199N2 199 North
## C206N3 C206N3 206 North
## C206S2 C206S2 206 South
## C199N1 C199N1 199 North
## C199P1 C199P1 199 Point
## C199S1 C199S1 199 South
## C214N1 C214N1 214 North
## C172P1 C172P1 172 Point
## C199P2 C199P2 199 Point
## C172N3 C172N3 172 North
## C172S3 C172S3 172 South
## C178S2 C178S2 178 South
## C178P3 C178P3 178 Point
## C178S3 C178S3 178 South
## C172N1 C172N1 172 North
## C172S1 C172S1 172 South
## C178N3 C178N3 178 North
## C185N2 C185N2 185 North
## C185N3 C185N3 185 North
## C185S3 C185S3 185 South
## C214N2 C214N2 214 North
## C172P2 C172P2 172 Point
## C185S2 C185S2 185 South
## C172P3 C172P3 172 Point
## C185N1 C185N1 185 North
## C172N2 C172N2 172 North
## C178S1 C178S1 178 South
## C185S1 C185S1 185 South
## C172S2 C172S2 172 South
## C178N2 C178N2 178 North
head(otu_table(biom))
## OTU Table: [6 taxa and 52 samples]
## taxa are rows
## C178N1 C178P1 C185P2 C206N2 C206P1 C206P2
## New.CleanUp.ReferenceOTU155901 0 0 0 0 0 1
## New.CleanUp.ReferenceOTU321320 0 0 0 0 0 0
## KC551585.1.1451 2 0 0 6 0 4
## JQ945994.1.1399 0 0 0 0 0 0
## EF653577.1.1339 0 0 0 0 0 0
## JQ814729.1.1495 137 8 1 73 8 6
## C214P1 C214P2 C214P3 C214S1 C214S2 C214S3
## New.CleanUp.ReferenceOTU155901 0 0 1 0 1 0
## New.CleanUp.ReferenceOTU321320 0 0 0 0 0 0
## KC551585.1.1451 9 4 11 2 1 0
## JQ945994.1.1399 1 0 0 1 0 0
## EF653577.1.1339 0 0 0 4 0 0
## JQ814729.1.1495 11 4 27 40 8 23
## C185P1 C185P3 C199P3 C199S2 C199S3 C206N1
## New.CleanUp.ReferenceOTU155901 0 0 0 0 0 0
## New.CleanUp.ReferenceOTU321320 0 1 0 0 0 0
## KC551585.1.1451 3 2 5 0 0 11
## JQ945994.1.1399 0 0 0 0 0 0
## EF653577.1.1339 0 0 0 0 0 0
## JQ814729.1.1495 6 4 2 11 2 56
## C178P2 C199N3 C206S1 C214N3 C199N2 C206N3
## New.CleanUp.ReferenceOTU155901 0 0 0 0 0 0
## New.CleanUp.ReferenceOTU321320 0 0 0 0 0 0
## KC551585.1.1451 0 1 4 3 0 3
## JQ945994.1.1399 0 0 2 0 0 0
## EF653577.1.1339 0 0 0 0 0 0
## JQ814729.1.1495 10 23 6 19 5 849
## C206S2 C199N1 C199P1 C199S1 C214N1 C172P1
## New.CleanUp.ReferenceOTU155901 0 0 0 0 0 0
## New.CleanUp.ReferenceOTU321320 0 0 0 0 0 0
## KC551585.1.1451 1 0 8 2 5 0
## JQ945994.1.1399 0 0 0 0 0 0
## EF653577.1.1339 0 0 0 0 0 0
## JQ814729.1.1495 14 24 5 5 3 2
## C199P2 C172N3 C172S3 C178S2 C178P3 C178S3
## New.CleanUp.ReferenceOTU155901 0 0 0 0 0 0
## New.CleanUp.ReferenceOTU321320 0 0 0 0 0 0
## KC551585.1.1451 2 5 8 0 1 0
## JQ945994.1.1399 0 0 0 0 0 0
## EF653577.1.1339 0 0 0 0 0 0
## JQ814729.1.1495 2 121 31 0 0 5
## C172N1 C172S1 C178N3 C185N2 C185N3 C185S3
## New.CleanUp.ReferenceOTU155901 0 0 0 0 0 0
## New.CleanUp.ReferenceOTU321320 0 0 0 0 0 0
## KC551585.1.1451 0 0 1 0 0 0
## JQ945994.1.1399 0 0 0 0 0 0
## EF653577.1.1339 0 0 0 0 0 0
## JQ814729.1.1495 5 16 4 6 27 4
## C214N2 C172P2 C185S2 C172P3 C185N1 C172N2
## New.CleanUp.ReferenceOTU155901 0 0 0 0 0 0
## New.CleanUp.ReferenceOTU321320 0 0 1 0 0 0
## KC551585.1.1451 3 2 0 2 1 6
## JQ945994.1.1399 0 0 0 0 0 0
## EF653577.1.1339 0 0 0 0 0 0
## JQ814729.1.1495 4 13 1 3 22 21
## C178S1 C185S1 C172S2 C178N2
## New.CleanUp.ReferenceOTU155901 0 0 0 0
## New.CleanUp.ReferenceOTU321320 0 0 0 0
## KC551585.1.1451 0 1 0 3
## JQ945994.1.1399 0 0 3 0
## EF653577.1.1339 0 0 0 0
## JQ814729.1.1495 2 3 21 18
# Custom plotting.
nolegend <- theme(legend.position="none")
readabund <- labs(y="read abundance")
# Normalize by relative abundance.
biom.relabund <- transform_sample_counts(biom, function(x) x / sum(x))
# Ordination plot.
ordNMDS <- ordinate(biom.relabund, method="NMDS", distance="bray")
## Run 0 stress 0.1164511
## Run 1 stress 0.1164511
## ... Procrustes: rmse 3.272388e-06 max resid 1.660708e-05
## ... Similar to previous best
## Run 2 stress 0.1164511
## ... Procrustes: rmse 2.653511e-06 max resid 1.013281e-05
## ... Similar to previous best
## Run 3 stress 0.1164511
## ... Procrustes: rmse 4.851864e-06 max resid 2.66641e-05
## ... Similar to previous best
## Run 4 stress 0.1164499
## ... New best solution
## ... Procrustes: rmse 0.0002641187 max resid 0.00135479
## ... Similar to previous best
## Run 5 stress 0.1164511
## ... Procrustes: rmse 0.0002637999 max resid 0.001356076
## ... Similar to previous best
## Run 6 stress 0.1164499
## ... Procrustes: rmse 3.215999e-06 max resid 2.066861e-05
## ... Similar to previous best
## Run 7 stress 0.1164499
## ... New best solution
## ... Procrustes: rmse 2.24477e-06 max resid 7.595852e-06
## ... Similar to previous best
## Run 8 stress 0.1164499
## ... Procrustes: rmse 6.655844e-06 max resid 2.331752e-05
## ... Similar to previous best
## Run 9 stress 0.1164511
## ... Procrustes: rmse 0.0002639389 max resid 0.001356566
## ... Similar to previous best
## Run 10 stress 0.1164499
## ... Procrustes: rmse 4.751411e-06 max resid 1.984868e-05
## ... Similar to previous best
## Run 11 stress 0.1164499
## ... New best solution
## ... Procrustes: rmse 1.369096e-06 max resid 5.075076e-06
## ... Similar to previous best
## Run 12 stress 0.1164499
## ... Procrustes: rmse 2.570016e-06 max resid 1.538893e-05
## ... Similar to previous best
## Run 13 stress 0.1164511
## ... Procrustes: rmse 0.0002639619 max resid 0.001357598
## ... Similar to previous best
## Run 14 stress 0.1164511
## ... Procrustes: rmse 0.0002641559 max resid 0.001347027
## ... Similar to previous best
## Run 15 stress 0.1164511
## ... Procrustes: rmse 0.0002639791 max resid 0.001356877
## ... Similar to previous best
## Run 16 stress 0.1164499
## ... Procrustes: rmse 1.673567e-06 max resid 6.01596e-06
## ... Similar to previous best
## Run 17 stress 0.1164511
## ... Procrustes: rmse 0.000264147 max resid 0.001350813
## ... Similar to previous best
## Run 18 stress 0.1164499
## ... Procrustes: rmse 2.231739e-06 max resid 1.00423e-05
## ... Similar to previous best
## Run 19 stress 0.1164499
## ... Procrustes: rmse 1.737791e-06 max resid 4.548443e-06
## ... Similar to previous best
## Run 20 stress 0.1164499
## ... Procrustes: rmse 3.567674e-06 max resid 1.473758e-05
## ... Similar to previous best
## *** Solution reached
ordNMDS.k3 <- ordinate(biom.relabund, method="NMDS", distance="bray", k=3)
## Run 0 stress 0.07869986
## Run 1 stress 0.07870031
## ... Procrustes: rmse 6.12514e-05 max resid 0.0002914955
## ... Similar to previous best
## Run 2 stress 0.08803557
## Run 3 stress 0.08338382
## Run 4 stress 0.08334049
## Run 5 stress 0.08696056
## Run 6 stress 0.07869932
## ... New best solution
## ... Procrustes: rmse 0.0008157553 max resid 0.003206486
## ... Similar to previous best
## Run 7 stress 0.08695914
## Run 8 stress 0.07883936
## ... Procrustes: rmse 0.004642341 max resid 0.02353269
## Run 9 stress 0.07869838
## ... New best solution
## ... Procrustes: rmse 0.0001950192 max resid 0.0006709648
## ... Similar to previous best
## Run 10 stress 0.07869807
## ... New best solution
## ... Procrustes: rmse 0.000218397 max resid 0.0006309467
## ... Similar to previous best
## Run 11 stress 0.07870055
## ... Procrustes: rmse 0.0005644573 max resid 0.001874261
## ... Similar to previous best
## Run 12 stress 0.08337021
## Run 13 stress 0.07869866
## ... Procrustes: rmse 0.0002681414 max resid 0.0007798339
## ... Similar to previous best
## Run 14 stress 0.08338119
## Run 15 stress 0.07869851
## ... Procrustes: rmse 0.0002175794 max resid 0.0005823668
## ... Similar to previous best
## Run 16 stress 0.07870038
## ... Procrustes: rmse 0.0005566319 max resid 0.002503063
## ... Similar to previous best
## Run 17 stress 0.07869835
## ... Procrustes: rmse 0.000196876 max resid 0.0008596606
## ... Similar to previous best
## Run 18 stress 0.0786981
## ... Procrustes: rmse 0.0001248532 max resid 0.0004052929
## ... Similar to previous best
## Run 19 stress 0.08337779
## Run 20 stress 0.07869969
## ... Procrustes: rmse 0.000444327 max resid 0.002138182
## ... Similar to previous best
## *** Solution reached
ord.k3 <- plot_ordination(biom.relabund, ordNMDS.k3, shape="Site", color = "DateSite") + geom_point(size=3) + geom_polygon(aes(fill=DateSite), alpha=0.7)
ord.k3 + labs(title = "Cladophora, 2014") + theme_bw()

# Ordination plot in black and white.
ord.k3.bw <- plot_ordination(biom.relabund, ordNMDS.k3, shape="Site", label = "Date") + geom_point(size=2) + geom_polygon(colour="black", size=0.25, fill="white", alpha = 0.2, aes(group=DateSite))
ord.k3.bw + labs(title = "Cladophora, 2014") + theme_bw()

# Facet by Date.
ord.k3.facet1 <- plot_ordination(biom.relabund, ordNMDS.k3, shape="Site") + geom_point(size=2.5) + facet_wrap(~Date) + geom_polygon(colour="black", size=0.25, fill="white", alpha = 0.2, aes(group=DateSite))
ord.k3.facet1 + labs(title = "Cladophora, 2014") + theme_bw()

# Facet by Site.
ord.k3.facet2 <- plot_ordination(biom.relabund, ordNMDS.k3, color = "Date") + geom_point(size=2.5) + facet_wrap(~Site, ncol = 1) + geom_polygon(colour="black", size=0.25, fill="white", alpha = 0.2, aes(group=DateSite))
ord.k3.facet2 + labs(title = "Cladophora, 2014") + theme_bw()

# Find methanotrophs
methanolist <- read.table(file = "methanos.txt")
methanolist <- as.vector(methanolist$V1)
#
biom.relabund.methanos <- subset_taxa(biom.relabund, Genus %in% as.factor(methanolist))
biom.relabund.methanos
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 567 taxa and 52 samples ]
## sample_data() Sample Data: [ 52 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 567 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 567 tips and 566 internal nodes ]
head(tax_table(biom.relabund.methanos))
## Taxonomy Table: [6 taxa by 8 taxonomic ranks]:
## Kingdom Phylum
## AB240506.1.1496 "Bacteria" "Proteobacteria"
## KC432108.1.1352 "Bacteria" "Proteobacteria"
## New.CleanUp.ReferenceOTU247624 "Bacteria" "Proteobacteria"
## AJ422152.1.1406 "Bacteria" "Proteobacteria"
## AB989868.1.1453 "Bacteria" "Proteobacteria"
## AJ422161.1.1420 "Bacteria" "Proteobacteria"
## Class Order
## AB240506.1.1496 "Betaproteobacteria" "Burkholderiales"
## KC432108.1.1352 "Betaproteobacteria" "Burkholderiales"
## New.CleanUp.ReferenceOTU247624 "Betaproteobacteria" "Burkholderiales"
## AJ422152.1.1406 "Betaproteobacteria" "Burkholderiales"
## AB989868.1.1453 "Betaproteobacteria" "Burkholderiales"
## AJ422161.1.1420 "Betaproteobacteria" "Burkholderiales"
## Family Genus Species
## AB240506.1.1496 "Comamonadaceae" "Methylibium" NA
## KC432108.1.1352 "Comamonadaceae" "Methylibium" NA
## New.CleanUp.ReferenceOTU247624 "Comamonadaceae" "Methylibium" NA
## AJ422152.1.1406 "Comamonadaceae" "Methylibium" NA
## AB989868.1.1453 "Comamonadaceae" "Methylibium" NA
## AJ422161.1.1420 "Comamonadaceae" "Methylibium" NA
## Rank1
## AB240506.1.1496 NA
## KC432108.1.1352 NA
## New.CleanUp.ReferenceOTU247624 NA
## AJ422152.1.1406 NA
## AB989868.1.1453 NA
## AJ422161.1.1420 NA
#
relabund.methanos <- psmelt(biom.relabund.methanos)
relabund.methanos.genus <- relabund.methanos%>%
group_by(Sample, Genus)%>%
mutate(GenusAbundance = sum(Abundance))%>%
distinct(Sample, GenusAbundance, TreatmentGroup, Site, Date, Family, Genus)
head(relabund.methanos.genus)
## Source: local data frame [6 x 9]
## Groups: Sample, Genus [6]
##
## Sample GenusAbundance TreatmentGroup Site Date Family
## <chr> <dbl> <fctr> <fctr> <fctr> <fctr>
## 1 C178S2 0.1472802 Early South 178 Crenotrichaceae
## 2 C199S1 0.1240842 Late South 199 Crenotrichaceae
## 3 C185S1 0.1272864 Early South 185 Crenotrichaceae
## 4 C199S3 0.1048677 Late South 199 Crenotrichaceae
## 5 C185S2 0.1128401 Early South 185 Crenotrichaceae
## 6 C178S1 0.1214166 Early South 178 Crenotrichaceae
## # ... with 3 more variables: Genus <fctr>, Sample <chr>, Genus <fctr>
#
p <- ggplot(relabund.methanos.genus, aes(as.factor(Date), GenusAbundance, color = Site))
p <- p + geom_point() + facet_wrap(~Genus, scales="free_y")
p

# Stacked bar plots of methanotrophs.
barstack.methanos <- plot_bar(biom.relabund.methanos, x = "Date", fill="Site") + facet_wrap(~Genus, scales="free_y")
barstack.methanos

relabund.methanos.genus.est <- summarySE(relabund.methanos.genus, measurevar="GenusAbundance", groupvars=c("Site","Date", "Genus"))
head(relabund.methanos.genus.est)
## Site Date Genus N GenusAbundance sd se
## 1 North 172 Crenothrix 3 8.092387e-04 4.823472e-04 2.784833e-04
## 2 North 172 Methylibium 3 6.600147e-04 3.800437e-04 2.194183e-04
## 3 North 172 Methylocaldum 3 7.717447e-05 5.988775e-05 3.457621e-05
## 4 North 172 Methylomicrobium 3 0.000000e+00 0.000000e+00 0.000000e+00
## 5 North 172 Methylomonas 3 1.097031e-04 2.736128e-06 1.579704e-06
## 6 North 172 Methylosinus 3 4.677898e-05 2.432451e-05 1.404376e-05
## ci
## 1 1.198217e-03
## 2 9.440809e-04
## 3 1.487694e-04
## 4 0.000000e+00
## 5 6.796919e-06
## 6 6.042543e-05
relabund.methanos.genus.est$Date <- as.character(relabund.methanos.genus.est$Date)
relabund.methanos.genus.est$Date <- as.numeric(relabund.methanos.genus.est$Date)
p <- ggplot(relabund.methanos.genus.est, aes(x=Date, y=GenusAbundance)) + geom_point() + geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) + geom_smooth(method="lm", se=TRUE) + facet_wrap(~Genus~Site, ncol = 3, scales="free_y")
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# https://joey711.github.io/phyloseq/plot_richness-examples.html
# Plot richness.
biom.rich <- plot_richness(biom, x="Date", color="Site", measures=c("Observed", "Chao1", "Shannon"))
biom.rich + geom_point(size=2, alpha=0.6)

biom.est.rich <- estimate_richness(biom, measures = NULL)
biom.est.rich$SampleID.1 <- row.names(biom.est.rich)
biom.est.rich <- merge(biom.est.rich, sam.data, by = "SampleID.1")
biom.est.rich$Date <- as.character(biom.est.rich$Date)
biom.est.rich$Date <- as.numeric(biom.est.rich$Date)
biom.est.rich.obs <- summarySE(biom.est.rich, measurevar="Observed", groupvars=c("Date","Site"))
str(biom.est.rich.obs)
## 'data.frame': 18 obs. of 7 variables:
## $ Date : num 172 172 172 178 178 178 185 185 185 199 ...
## $ Site : Factor w/ 3 levels "North","Point",..: 1 2 3 1 2 3 1 2 3 1 ...
## $ N : num 3 3 3 3 3 3 3 3 3 3 ...
## $ Observed: num 5755 3840 4230 4371 4199 ...
## $ sd : num 984 952 161 688 376 ...
## $ se : num 568.2 549.4 93.1 397.2 217.2 ...
## $ ci : num 2445 2364 400 1709 934 ...
p.obs <- ggplot(biom.est.rich.obs, aes(x=Date, y=Observed)) + geom_point() + geom_errorbar(aes(ymin=Observed-se, ymax=Observed+se)) + geom_smooth(method="lm",se=TRUE) + facet_grid(~ Site)
p.obs + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

biom.est.rich.sha <- summarySE(biom.est.rich, measurevar="Shannon", groupvars=c("Date","Site")); biom.est.rich.sha
## Date Site N Shannon sd se ci
## 1 172 North 3 5.691512 0.27015157 0.155972080 0.67109370
## 2 172 Point 3 4.994391 0.27100848 0.156466818 0.67322238
## 3 172 South 3 5.289918 0.05771106 0.033319494 0.14336221
## 4 178 North 3 5.239205 0.06234512 0.035994972 0.15487386
## 5 178 Point 3 5.189739 0.05970346 0.034469806 0.14831161
## 6 178 South 3 5.275200 0.03777686 0.021810483 0.09384293
## 7 185 North 3 5.278452 0.08534700 0.049275111 0.21201369
## 8 185 Point 3 5.379744 0.14580223 0.084178957 0.36219282
## 9 185 South 3 5.395927 0.01475526 0.008518956 0.03665411
## 10 199 North 3 5.464040 0.10460268 0.060392385 0.25984746
## 11 199 Point 3 5.555742 0.04974626 0.028721015 0.12357655
## 12 199 South 3 5.709314 0.06839587 0.039488375 0.16990477
## 13 206 North 3 5.509243 0.63523457 0.366752847 1.57801014
## 14 206 Point 2 5.777171 0.07811922 0.055238633 0.70187338
## 15 206 South 2 6.029796 0.10170518 0.071916421 0.91378477
## 16 214 North 3 5.606372 0.04046684 0.023363538 0.10052519
## 17 214 Point 3 5.740453 0.03729225 0.021530691 0.09263909
## 18 214 South 3 6.435686 0.05378621 0.031053481 0.13361234
p.sha <- ggplot(biom.est.rich.sha, aes(x=Date, y=Shannon)) + geom_point() + geom_errorbar(aes(ymin=Shannon-se, ymax=Shannon+se)) + geom_smooth(method="lm",se=TRUE) + facet_grid(~ Site)
p.sha + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

biom.est.rich.ch1 <- summarySE(biom.est.rich, measurevar="Chao1", groupvars=c("Date","Site")); biom.est.rich.ch1
## Date Site N Chao1 sd se ci
## 1 172 North 3 7429.346 935.0547 539.85409 2322.8047
## 2 172 Point 3 5666.254 1199.0783 692.28819 2978.6757
## 3 172 South 3 6360.755 398.6771 230.17633 990.3688
## 4 178 North 3 6700.207 1222.0559 705.55428 3035.7551
## 5 178 Point 3 6664.523 453.2735 261.69757 1125.9938
## 6 178 South 3 6917.707 230.6817 133.18414 573.0451
## 7 185 North 3 6346.119 309.1132 178.46659 767.8797
## 8 185 Point 3 6802.057 493.3480 284.83460 1225.5444
## 9 185 South 3 6394.629 463.1020 267.37204 1150.4091
## 10 199 North 3 6660.997 383.5766 221.45806 952.8571
## 11 199 Point 3 7173.874 299.3104 172.80691 743.5281
## 12 199 South 3 7870.023 384.8040 222.16671 955.9062
## 13 206 North 3 8351.495 741.9579 428.36959 1843.1256
## 14 206 Point 2 8458.037 691.5015 488.96543 6212.8949
## 15 206 South 2 8495.197 800.6637 566.15470 7193.6776
## 16 214 North 3 7347.579 1190.2184 687.17294 2956.6665
## 17 214 Point 3 7762.503 126.9796 73.31169 315.4347
## 18 214 South 3 8723.704 1065.9220 615.41037 2647.8971
p.ch1 <- ggplot(biom.est.rich.ch1, aes(x=Date, y=Chao1)) + geom_point() + geom_errorbar(aes(ymin=Chao1-se, ymax=Chao1+se)) + geom_smooth(method="lm",se=TRUE) + facet_grid(~ Site)
p.ch1 + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

library("vegan"); packageVersion("vegan")
## [1] '2.4.1'
# Anosim by site.
site_group = get_variable(biom, "Site")
site_ano = anosim(phyloseq::distance(biom, "bray"), site_group)
site_ano
##
## Call:
## anosim(dat = phyloseq::distance(biom, "bray"), grouping = site_group)
## Dissimilarity: bray
##
## ANOSIM statistic R: 0.3419
## Significance: 0.001
##
## Permutation: free
## Number of permutations: 999
# Anosim by date.
date_group = get_variable(biom, "Date")
date_ano = anosim(phyloseq::distance(biom, "bray"), date_group)
date_ano
##
## Call:
## anosim(dat = phyloseq::distance(biom, "bray"), grouping = date_group)
## Dissimilarity: bray
##
## ANOSIM statistic R: 0.6204
## Significance: 0.001
##
## Permutation: free
## Number of permutations: 999
#
df = as(sample_data(biom), "data.frame")
d = phyloseq::distance(biom, "bray")
clado.adonis = adonis(d ~ Date + Site + Date*Site, df)
clado.adonis
##
## Call:
## adonis(formula = d ~ Date + Site + Date * Site, data = df)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Date 5 3.1945 0.63890 17.4143 0.41338 0.001 ***
## Site 2 1.5561 0.77804 21.2068 0.20136 0.001 ***
## Date:Site 10 1.7298 0.17298 4.7148 0.22384 0.001 ***
## Residuals 34 1.2474 0.03669 0.16142
## Total 51 7.7278 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
top.genera = sort(tapply(taxa_sums(biom.relabund), tax_table(biom.relabund)[, "Genus"], sum), TRUE)
top.genera = top.genera[1:25]
# Prune to just the most-abundant genera.
biom.relabund.subset = subset_taxa(biom.relabund, Genus %in% names(top.genera))
get_taxa_unique(biom.relabund.subset, "Genus")
## [1] "Hydrogenophaga" "Cellvibrio" "Pseudomonas"
## [4] "Crenothrix" "Methylotenera" "Dechloromonas"
## [7] "Lysobacter" "Hyphomicrobium" "Rhodobacter"
## [10] "Planctomyces" "Pseudanabaena" "Leptolyngbya"
## [13] "Synechococcus" "Bdellovibrio" "Armatimonas"
## [16] "Sediminibacterium" "Flavobacterium" "Fluviicola"
## [19] "Flectobacillus" "Haliscomenobacter" "Leadbetterella"
## [22] "Emticicia" "Hyphomonas" "Meiothermus"
## [25] "CM44"
# Find top phyla.
top.genera.list <- as.vector(names(top.genera))
biom.relabund.subset.taxa <- subset_taxa(biom.relabund.subset, Genus %in% as.factor(top.genera.list))
biom.relabund.subset.taxa
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 8747 taxa and 52 samples ]
## sample_data() Sample Data: [ 52 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 8747 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 8747 tips and 8743 internal nodes ]
head(tax_table(biom.relabund.subset.taxa))
## Taxonomy Table: [6 taxa by 8 taxonomic ranks]:
## Kingdom Phylum
## New.CleanUp.ReferenceOTU269444 "Bacteria" "Proteobacteria"
## New.CleanUp.ReferenceOTU117342 "Bacteria" "Proteobacteria"
## New.CleanUp.ReferenceOTU139696 "Bacteria" "Proteobacteria"
## New.CleanUp.ReferenceOTU449979 "Bacteria" "Proteobacteria"
## New.CleanUp.ReferenceOTU112788 "Bacteria" "Proteobacteria"
## New.CleanUp.ReferenceOTU455978 "Bacteria" "Proteobacteria"
## Class Order
## New.CleanUp.ReferenceOTU269444 "Betaproteobacteria" "Burkholderiales"
## New.CleanUp.ReferenceOTU117342 "Betaproteobacteria" "Burkholderiales"
## New.CleanUp.ReferenceOTU139696 "Betaproteobacteria" "Burkholderiales"
## New.CleanUp.ReferenceOTU449979 "Betaproteobacteria" "Burkholderiales"
## New.CleanUp.ReferenceOTU112788 "Betaproteobacteria" "Burkholderiales"
## New.CleanUp.ReferenceOTU455978 "Betaproteobacteria" "Burkholderiales"
## Family Genus Species
## New.CleanUp.ReferenceOTU269444 "Comamonadaceae" "Hydrogenophaga" NA
## New.CleanUp.ReferenceOTU117342 "Comamonadaceae" "Hydrogenophaga" NA
## New.CleanUp.ReferenceOTU139696 "Comamonadaceae" "Hydrogenophaga" NA
## New.CleanUp.ReferenceOTU449979 "Comamonadaceae" "Hydrogenophaga" NA
## New.CleanUp.ReferenceOTU112788 "Comamonadaceae" "Hydrogenophaga" NA
## New.CleanUp.ReferenceOTU455978 "Comamonadaceae" "Hydrogenophaga" NA
## Rank1
## New.CleanUp.ReferenceOTU269444 NA
## New.CleanUp.ReferenceOTU117342 NA
## New.CleanUp.ReferenceOTU139696 NA
## New.CleanUp.ReferenceOTU449979 NA
## New.CleanUp.ReferenceOTU112788 NA
## New.CleanUp.ReferenceOTU455978 NA
relabund.top.genera <- psmelt(biom.relabund.subset.taxa)
relabund.top.genera.genus <- relabund.top.genera%>%
group_by(Sample, Genus)%>%
mutate(GenusAbundance = sum(Abundance))%>%
distinct(Sample, GenusAbundance, TreatmentGroup, Site, Date, Family, Genus)
head(relabund.top.genera.genus)
## Source: local data frame [6 x 9]
## Groups: Sample, Genus [6]
##
## Sample GenusAbundance TreatmentGroup Site Date Family
## <chr> <dbl> <fctr> <fctr> <fctr> <fctr>
## 1 C178S2 0.14728018 Early South 178 Crenotrichaceae
## 2 C206N2 0.07541493 Late North 206 Thermaceae
## 3 C199S1 0.12408425 Late South 199 Crenotrichaceae
## 4 C185S1 0.12728636 Early South 185 Crenotrichaceae
## 5 C199S3 0.10486769 Late South 199 Crenotrichaceae
## 6 C172S1 0.10894857 Early South 172 Cytophagaceae
## # ... with 3 more variables: Genus <fctr>, Sample <chr>, Genus <fctr>
# Plot.
relabund.top.genera.genus.est <- summarySE(relabund.top.genera.genus, measurevar="GenusAbundance", groupvars=c("Site","Date", "Genus"))
head(relabund.top.genera.genus.est)
## Site Date Genus N GenusAbundance sd se
## 1 North 172 Armatimonas 3 0.0082179013 0.0037601213 0.0021709070
## 2 North 172 Bdellovibrio 3 0.0023540936 0.0001963925 0.0001133873
## 3 North 172 Cellvibrio 3 0.0058399987 0.0066066072 0.0038143264
## 4 North 172 CM44 3 0.0088329518 0.0005298073 0.0003058844
## 5 North 172 Crenothrix 3 0.0008092387 0.0004823472 0.0002784833
## 6 North 172 Dechloromonas 3 0.0019270556 0.0026318679 0.0015195096
## ci
## 1 0.0093406591
## 2 0.0004878661
## 3 0.0164117220
## 4 0.0013161143
## 5 0.0011982168
## 6 0.0065379223
relabund.top.genera.genus.est$Date <- as.character(relabund.top.genera.genus.est$Date)
relabund.top.genera.genus.est$Date <- as.numeric(relabund.top.genera.genus.est$Date)
p <- ggplot(relabund.top.genera.genus.est, aes(x=Date, y=GenusAbundance)) + geom_point() + geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) + geom_smooth(method="lm", se=TRUE) + facet_wrap(~Genus~Site, ncol = 3, scales="free_y")
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))
